/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.solr.search;

import org.apache.lucene.util.BitUtil;

/**
 * <code>HashDocSet</code> represents an unordered set of Lucene Document Ids
 * using a primitive int hash table. It can be a better choice if there are few
 * docs in the set because it takes up less memory and is faster to iterate and
 * take set intersections.
 * 
 * 
 * @since solr 0.9
 */
public final class HashDocSet extends DocSetBase {
	/**
	 * Default load factor to use for HashDocSets. We keep track of the inverse
	 * since multiplication is so much faster than division. The default is 1.0f
	 * / 0.75f
	 */
	static float DEFAULT_INVERSE_LOAD_FACTOR = 1.0f / 0.75f;

	// public final static int MAX_SIZE =
	// SolrConfig.config.getInt("//HashDocSet/@maxSize",-1);

	// lucene docs are numbered from 0, so a neg number must be used for
	// missing.
	// an alternative to having to init the array to EMPTY at the start is
	//
	private final static int EMPTY = -1;
	private final int[] table;
	private final int size;

	private final int mask;

	public HashDocSet(HashDocSet set) {
		this.table = set.table.clone();
		this.size = set.size;
		this.mask = set.mask;
	}

	/** Create a HashDocSet from a list of *unique* ids */
	public HashDocSet(int[] docs, int offset, int len) {
		this(docs, offset, len, DEFAULT_INVERSE_LOAD_FACTOR);
	}

	/** Create a HashDocSet from a list of *unique* ids */
	public HashDocSet(int[] docs, int offset, int len, float inverseLoadFactor) {
		int tsize = Math.max(BitUtil.nextHighestPowerOfTwo(len), 1);
		if (tsize < len * inverseLoadFactor) {
			tsize <<= 1;
		}

		mask = tsize - 1;

		table = new int[tsize];
		// (for now) better then: Arrays.fill(table, EMPTY);
		// https://issues.apache.org/jira/browse/SOLR-390
		for (int i = tsize - 1; i >= 0; i--)
			table[i] = EMPTY;

		int end = offset + len;
		for (int i = offset; i < end; i++) {
			put(docs[i]);
		}

		size = len;
	}

	void put(int doc) {
		int s = doc & mask;
		while (table[s] != EMPTY) {
			// Adding an odd number to this power-of-two hash table is
			// guaranteed to do a full traversal, so instead of re-hashing
			// we jump straight to a "linear" traversal.
			// The key is that we provide many different ways to do the
			// traversal (tablesize/2) based on the last hash code (the doc).
			// Rely on loop invariant code motion to eval ((doc>>7)|1) only
			// once.
			// otherwise, we would need to pull the first case out of the loop.
			s = (s + ((doc >> 7) | 1)) & mask;
		}
		table[s] = doc;
	}

	public boolean exists(int doc) {
		int s = doc & mask;
		for (;;) {
			int v = table[s];
			if (v == EMPTY)
				return false;
			if (v == doc)
				return true;
			// see put() for algorithm details.
			s = (s + ((doc >> 7) | 1)) & mask;
		}
	}

	public int size() {
		return size;
	}

	public DocIterator iterator() {
		return new DocIterator() {
			int pos = 0;
			int doc;
			{
				goNext();
			}

			public boolean hasNext() {
				return pos < table.length;
			}

			public Integer next() {
				return nextDoc();
			}

			public void remove() {
			}

			void goNext() {
				while (pos < table.length && table[pos] == EMPTY)
					pos++;
			}

			// modify to return -1 at end of iteration?
			public int nextDoc() {
				int doc = table[pos];
				pos++;
				goNext();
				return doc;
			}

			public float score() {
				return 0.0f;
			}
		};
	}

	public long memSize() {
		return (table.length << 2) + 20;
	}

	@Override
	public DocSet intersection(DocSet other) {
		if (other instanceof HashDocSet) {
			// set "a" to the smallest doc set for the most efficient
			// intersection.
			final HashDocSet a = size() <= other.size() ? this
					: (HashDocSet) other;
			final HashDocSet b = size() <= other.size() ? (HashDocSet) other
					: this;

			int[] result = new int[a.size()];
			int resultCount = 0;
			for (int i = 0; i < a.table.length; i++) {
				int id = a.table[i];
				if (id >= 0 && b.exists(id)) {
					result[resultCount++] = id;
				}
			}
			return new HashDocSet(result, 0, resultCount);

		} else {

			int[] result = new int[size()];
			int resultCount = 0;
			for (int i = 0; i < table.length; i++) {
				int id = table[i];
				if (id >= 0 && other.exists(id)) {
					result[resultCount++] = id;
				}
			}
			return new HashDocSet(result, 0, resultCount);
		}

	}

	@Override
	public int intersectionSize(DocSet other) {
		if (other instanceof HashDocSet) {
			// set "a" to the smallest doc set for the most efficient
			// intersection.
			final HashDocSet a = size() <= other.size() ? this
					: (HashDocSet) other;
			final HashDocSet b = size() <= other.size() ? (HashDocSet) other
					: this;

			int resultCount = 0;
			for (int i = 0; i < a.table.length; i++) {
				int id = a.table[i];
				if (id >= 0 && b.exists(id)) {
					resultCount++;
				}
			}
			return resultCount;
		} else {
			int resultCount = 0;
			for (int i = 0; i < table.length; i++) {
				int id = table[i];
				if (id >= 0 && other.exists(id)) {
					resultCount++;
				}
			}
			return resultCount;
		}

	}

	@Override
	public boolean intersects(DocSet other) {
		if (other instanceof HashDocSet) {
			// set "a" to the smallest doc set for the most efficient
			// intersection.
			final HashDocSet a = size() <= other.size() ? this
					: (HashDocSet) other;
			final HashDocSet b = size() <= other.size() ? (HashDocSet) other
					: this;

			for (int i = 0; i < a.table.length; i++) {
				int id = a.table[i];
				if (id >= 0 && b.exists(id)) {
					return true;
				}
			}
			return false;
		} else {
			for (int i = 0; i < table.length; i++) {
				int id = table[i];
				if (id >= 0 && other.exists(id)) {
					return true;
				}
			}
			return false;
		}
	}

	@Override
	public DocSet andNot(DocSet other) {
		int[] result = new int[size()];
		int resultCount = 0;

		for (int i = 0; i < table.length; i++) {
			int id = table[i];
			if (id >= 0 && !other.exists(id)) {
				result[resultCount++] = id;
			}
		}
		return new HashDocSet(result, 0, resultCount);
	}

	@Override
	public DocSet union(DocSet other) {
		if (other instanceof HashDocSet) {
			// set "a" to the smallest doc set
			final HashDocSet a = size() <= other.size() ? this
					: (HashDocSet) other;
			final HashDocSet b = size() <= other.size() ? (HashDocSet) other
					: this;

			int[] result = new int[a.size() + b.size()];
			int resultCount = 0;
			// iterate over the largest table first, adding w/o checking.
			for (int i = 0; i < b.table.length; i++) {
				int id = b.table[i];
				if (id >= 0)
					result[resultCount++] = id;
			}

			// now iterate over smaller set, adding all not already in larger
			// set.
			for (int i = 0; i < a.table.length; i++) {
				int id = a.table[i];
				if (id >= 0 && !b.exists(id))
					result[resultCount++] = id;
			}

			return new HashDocSet(result, 0, resultCount);
		} else {
			return other.union(this);
		}
	}

	@Override
	protected HashDocSet clone() {
		return new HashDocSet(this);
	}

	// don't implement andNotSize() and unionSize() on purpose... they are
	// implemented
	// in BaseDocSet in terms of intersectionSize().
}
